Note: 1. It is expected that low number of cells for DT condition. We have seen less cells after sorting.
knitr::opts_chunk$set(warning=FALSE, messgae=FALSE, fig.path='Figs/', results = "hide")
## fig.width=4, fig.height=4
system_dir = "/home/jyu/rstudio/"
working_dir = paste0(system_dir,"/")
# published_data_dir = paste0(system_dir,"/general_scripts/publised_datasets/")
# global_ref_dir =paste0(system_dir,"/general_scripts/Global_ref_data/")
# gsea_pathway_dir = paste0(system_dir,"/general_scripts/Global_ref_data/")
# source(paste0(global_ref_dir,"general_functions.R"))
library(Seurat)
tmp_count = Read10X(data.dir = paste0(working_dir,"/Dec2021_10x/DT/"))
DT_seurat = CreateSeuratObject(counts = tmp_count,
project = "DT")
print(paste0("number of cells: ", ncol(DT_seurat)))
[1] “number of cells: 1968”
rm(tmp_count)
tmp_count = Read10X(data.dir = paste0(working_dir,"/Dec2021_10x/IL/"))
IL_seurat = CreateSeuratObject(counts = tmp_count,
project = "IL")
print(paste0("number of cells: ", ncol(IL_seurat)))
[1] “number of cells: 7438”
rm(tmp_count)
tmp_count = Read10X(data.dir = paste0(working_dir,"/Dec2021_10x/WT/"))
WT_seurat = CreateSeuratObject(counts = tmp_count,
project = "WT")
print(paste0("number of cells: ", ncol(WT_seurat)))
[1] “number of cells: 5619”
rm(tmp_count)
mt% < 10%, 1000< nFeature_RNA < 6000
DT_seurat[["percent.mt"]] <- PercentageFeatureSet(DT_seurat, pattern = "^mt-")
VlnPlot(DT_seurat,features = c("nCount_RNA","nFeature_RNA","percent.mt"))
FeatureScatter(DT_seurat,feature2 = "nFeature_RNA", feature1 = "nCount_RNA")
DT_seurat = subset(DT_seurat,subset=percent.mt < 10)
DT_seurat = subset(DT_seurat, subset=nFeature_RNA >1000 & nFeature_RNA < 6000)
VlnPlot(DT_seurat,features = c("nCount_RNA","nFeature_RNA","percent.mt"))
print(paste0("number of cells after filtering: ", ncol(DT_seurat)))
[1] “number of cells after filtering: 1318”
mt% < 10%, 500< nFeature_RNA < 6000
IL_seurat[["percent.mt"]] <- PercentageFeatureSet(IL_seurat, pattern = "^mt-")
VlnPlot(IL_seurat,features = c("nCount_RNA","nFeature_RNA","percent.mt"))
FeatureScatter(IL_seurat,feature2 = "nFeature_RNA", feature1 = "nCount_RNA")
IL_seurat = subset(IL_seurat,subset=percent.mt < 10)
IL_seurat = subset(IL_seurat, subset=nFeature_RNA >500 & nFeature_RNA < 6000)
VlnPlot(IL_seurat,features = c("nCount_RNA","nFeature_RNA","percent.mt"))
print(paste0("number of cells after filtering: ", ncol(IL_seurat)))
[1] “number of cells after filtering: 4538”
mt% < 10%, 500< nFeature_RNA < 6000
WT_seurat[["percent.mt"]] <- PercentageFeatureSet(WT_seurat, pattern = "^mt-")
VlnPlot(WT_seurat,features = c("nCount_RNA","nFeature_RNA","percent.mt"))
FeatureScatter(WT_seurat,feature2 = "nFeature_RNA", feature1 = "nCount_RNA")
WT_seurat = subset(WT_seurat,subset=percent.mt < 10)
WT_seurat = subset(WT_seurat, subset=nFeature_RNA >500 & nFeature_RNA < 6000)
VlnPlot(WT_seurat,features = c("nCount_RNA","nFeature_RNA","percent.mt"))
print(paste0("number of cells after filtering: ", ncol(WT_seurat)))
[1] “number of cells after filtering: 3313”
CAR_seurat = merge(x=WT_seurat,y=c(IL_seurat,DT_seurat),add.cell.ids = c("WT", "IL","DT"), project = "Kanako_CAR")
### normalization and scaling
#normalize data
CAR_seurat = CAR_seurat %>% NormalizeData(., normalization.method = "LogNormalize", scale.factor = 10000)
# find variable genes
CAR_seurat <- FindVariableFeatures(CAR_seurat, selection.method = "vst", nfeatures = 2000)
# Identify the 10 most highly variable genes
# plot variable features with labels
LabelPoints(plot = VariableFeaturePlot(CAR_seurat),
points = head(VariableFeatures(CAR_seurat), 10),
repel = TRUE)
# scale data
CAR_seurat <- ScaleData(CAR_seurat, features = VariableFeatures(object = CAR_seurat))
# PCA
CAR_seurat <- RunPCA(CAR_seurat, features = VariableFeatures(object = CAR_seurat))
# Examine and visualize PCA results a few different ways
# print(CAR_seurat[["pca"]], dims = 1:5, nfeatures = 5)
VizDimLoadings(CAR_seurat, dims = 1:2, reduction = "pca")
# DimPlot(CAR_seurat, reduction = "pca",dims = c(1,3))
DimPlot(CAR_seurat, reduction = "pca",dims = c(1,3),group.by = "orig.ident")
# determine dimensions
# NOTE: This process can take a long time for big datasets, comment out for expediency. More approximate techniques such as those implemented in ElbowPlot() can be used to reduce computation time
#CAR_seurat <- JackStraw(CAR_seurat, num.replicate = 100)
#CAR_seurat <- ScoreJackStraw(CAR_seurat, dims = 1:20)
### cluster and UMAP
#### search for the perfect clusters
set.seed(123)
# JackStrawPlot(CAR_seurat, dims = 1:15)
ElbowPlot(CAR_seurat)
# cluster cells
CAR_seurat <- FindNeighbors(CAR_seurat, dims = 1:30)
### use the resolution = 0.25
set.seed(123)
CAR_seurat <- FindClusters(CAR_seurat, resolution = 0.3)
#UMAP
CAR_seurat <- RunUMAP(CAR_seurat,
dims = 1:30,
n.neighbors = 30,
min.dist = 1,
n.components = 3,
spread = 3)
DimPlot(CAR_seurat,dims = c(1,3),label = TRUE)
# saveRDS(CAR_seurat,file = paste0(working_dir,"Dec10x_CAR_seurat.rds"))
# CAR_seurat = readRDS(file = paste0(working_dir,"Dec10x_CAR_seurat.rds"))
## same amount of cells per condition
cell_selected = CAR_seurat@meta.data[unlist(tapply(1:nrow(CAR_seurat@meta.data),CAR_seurat@meta.data$orig.ident,function(x) sample(x,1000))),]
CAR_seurat$orig.ident = factor(CAR_seurat$orig.ident,levels = c("WT","IL","DT"))
DimPlot(CAR_seurat[, rownames(cell_selected)], reduction = "umap",split.by = "orig.ident", dims = c(2,3),group.by = "seurat_clusters",label = TRUE)
VlnPlot(CAR_seurat,features = c("percent.mt"))
library(knitr)
kable(table(CAR_seurat@active.ident,CAR_seurat$orig.ident))
| WT | IL | DT | |
|---|---|---|---|
| 0 | 480 | 1147 | 237 |
| 1 | 297 | 691 | 405 |
| 2 | 540 | 355 | 45 |
| 3 | 437 | 414 | 70 |
| 4 | 292 | 464 | 152 |
| 5 | 354 | 295 | 91 |
| 6 | 258 | 294 | 130 |
| 7 | 153 | 440 | 23 |
| 8 | 70 | 172 | 37 |
| 9 | 105 | 112 | 39 |
| 10 | 187 | 30 | 29 |
| 11 | 36 | 34 | 29 |
| 12 | 34 | 21 | 13 |
| 13 | 29 | 24 | 2 |
| 14 | 21 | 17 | 12 |
| 15 | 9 | 18 | 0 |
| 16 | 11 | 10 | 4 |
cell_freq = table(CAR_seurat$orig.ident,CAR_seurat$seurat_clusters) %>% unlist() %>% as.data.frame()
ns <- table(organ = CAR_seurat$orig.ident, cell_type = CAR_seurat$seurat_clusters)
## remove cluster 1, 5, 6
# ns = ns[,c(1,3,4,5,8)]
fq <- prop.table(ns, 1) * 100
df <- as.data.frame(fq)
# df$cell_type = factor(df$cell_type,levels = c(2,0,4,3,7))
library(ggplot2)
ggplot(df,aes(x=Freq,y=organ,fill=cell_type))+
geom_bar(stat="identity",colour="black")+
# scale_fill_manual(values=c("wheat4","slategray4", "darkgreen", "lawngreen","black"))+
theme_classic()+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
rm(ns,fq,df)
# find DEGs
# Idents(CAR_seurat) = "sub1"
# CAR_seurat.markers <- FindAllMarkers(CAR_seurat, only.pos = FALSE, min.pct = 0.25, logfc.threshold = 0.5)
# write.csv(CAR_seurat.markers,file = paste0(working_dir,"Dec10x_degs.csv"))
# CAR_seurat.markers = read.csv(file = paste0(working_dir,"Dec10x_degs.csv"))
# pdf(file = paste0(working_dir,"10xrun_allcells_heatmap1.pdf"),height = 20,width = 12)
# DoHeatmap(CAR_seurat,
# features = CAR_seurat.markers %>% group_by(cluster) %>% dplyr::top_n(n = 10, wt = avg_log2FC) %>% .$gene,angle = 90)
# dev.off()
Idents(CAR_seurat) = "seurat_clusters"
CAR_seurat = RenameIdents(CAR_seurat,
"0" = "0:adi_progenitor",
"1" = "1:MSC",
"7" = "7:pre_adipocyte",
"3" = "3:OLC",
"4" = "4:OLC_progenitor",
"5" = "5:fibroblast",
"9" = "9:fibroblast",
"6" = "6:pericyte",
"10" = "10:chondrocyte",
"13" = "13:endothelial",
"12" = "12:prolifOCL",
"2" = "2:RBC",
"8" = "8:RBC",
"15" = "15:neutrophil",
"16" = "16:platelet",
"11" = "11:undefined",
"14" = "14:undefined")
#
DimPlot(CAR_seurat,dims = c(1,3),label = TRUE)
DimPlot(CAR_seurat,dims = c(1,2),label = TRUE)
DimPlot(CAR_seurat,dims = c(2,3),label = TRUE)
# The doughnut function permits to draw a donut plot
doughnut <-
function (x, labels = names(x), edges = 200, outer.radius = 0.8,
inner.radius=0.6, clockwise = FALSE,
init.angle = if (clockwise) 90 else 0, density = NULL,
angle = 45, col = NULL, border = FALSE, lty = NULL,
main = NULL, ...)
{
if (!is.numeric(x) || any(is.na(x) | x < 0))
stop("'x' values must be positive.")
if (is.null(labels))
labels <- as.character(seq_along(x))
else labels <- as.graphicsAnnot(labels)
x <- c(0, cumsum(x)/sum(x))
dx <- diff(x)
nx <- length(dx)
plot.new()
pin <- par("pin")
xlim <- ylim <- c(-1, 1)
if (pin[1L] > pin[2L])
xlim <- (pin[1L]/pin[2L]) * xlim
else ylim <- (pin[2L]/pin[1L]) * ylim
plot.window(xlim, ylim, "", asp = 1)
if (is.null(col))
col <- if (is.null(density))
palette()
else par("fg")
col <- rep(col, length.out = nx)
border <- rep(border, length.out = nx)
lty <- rep(lty, length.out = nx)
angle <- rep(angle, length.out = nx)
density <- rep(density, length.out = nx)
twopi <- if (clockwise)
-2 * pi
else 2 * pi
t2xy <- function(t, radius) {
t2p <- twopi * t + init.angle * pi/180
list(x = radius * cos(t2p),
y = radius * sin(t2p))
}
for (i in 1L:nx) {
n <- max(2, floor(edges * dx[i]))
P <- t2xy(seq.int(x[i], x[i + 1], length.out = n),
outer.radius)
polygon(c(P$x, 0), c(P$y, 0), density = density[i],
angle = angle[i], border = border[i],
col = col[i], lty = lty[i])
Pout <- t2xy(mean(x[i + 0:1]), outer.radius)
lab <- as.character(labels[i])
if (!is.na(lab) && nzchar(lab)) {
lines(c(1, 1.05) * Pout$x, c(1, 1.05) * Pout$y)
text(1.1 * Pout$x, 1.1 * Pout$y, labels[i],
xpd = TRUE, adj = ifelse(Pout$x < 0, 1, 0),
...)
}
## Add white disc
Pin <- t2xy(seq.int(0, 1, length.out = n*nx),
inner.radius)
polygon(Pin$x, Pin$y, density = density[i],
angle = angle[i], border = border[i],
col = "white", lty = lty[i])
}
title(main = main, ...)
invisible(NULL)
}
Idents(CAR_seurat) = "seurat_clusters"
# msc_ocl_seurat = subset(msc_ocl_seurat,idents=c("0:MSC","1:MSC","3:OCL","4:OCL","7:MSC"))
msc_ocl_seurat = subset(CAR_seurat,idents=c(0,1,3,4,7))
DimPlot(msc_ocl_seurat,dims = c(1,2), label = TRUE)
DimPlot(msc_ocl_seurat,dims = c(1,3), label = TRUE)
DimPlot(msc_ocl_seurat,dims = c(2,3), label = TRUE)
msc_ocl_seurat = RunUMAP(msc_ocl_seurat,
dims = 1:30,
n.neighbors = 30,
min.dist = 0.001,
n.components = 3,
spread = 1,
a = 1,
b = 0.95)
## 13:03:39 Read 5702 rows and found 30 numeric columns
## 13:03:39 Using Annoy for neighbor search, n_neighbors = 30
## 13:03:39 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 13:03:40 Writing NN index file to temp file /tmp/RtmpHbdY32/file25085036ca16
## 13:03:40 Searching Annoy index using 1 thread, search_k = 3000
## 13:03:42 Annoy recall = 100%
## 13:03:43 Commencing smooth kNN distance calibration using 1 thread
## 13:03:45 Initializing from normalized Laplacian + noise
## 13:03:45 Commencing optimization for 500 epochs, with 246142 positive edges
## 13:03:54 Optimization finished
DimPlot(msc_ocl_seurat,dims = c(1,2), label = TRUE)
DimPlot(msc_ocl_seurat,dims = c(1,3), label = TRUE)
DimPlot(msc_ocl_seurat,dims = c(1,3), label = TRUE,split.by = "orig.ident")
DimPlot(msc_ocl_seurat,dims = c(2,3), label = TRUE)
FeaturePlot(msc_ocl_seurat,features = c("Adipoq","Lpl","Apoe","Cxcl12"),dims = c(1,3),ncol = 1)
# saveRDS(msc_ocl_seurat,paste0(working_dir,"msc-ocl_seurat_umap0.95.rds"))
# msc_ocl_seurat = readRDS(paste0(working_dir,"msc-ocl_seurat_umap0.95.rds"))
Idents(msc_ocl_seurat) = "seurat_clusters"
msc_ocl_seurat = RenameIdents(msc_ocl_seurat,
"1" = "1:MSC",
"0" = "0:adi_progenitor",
"7" = "7:pre_adipocyte",
"4" = "4:OLC_progenitor",
"3" = "3:OLC")
DimPlot(msc_ocl_seurat,dims = c(1,3),split.by = "orig.ident")
sessionInfo()